import requests
from lxml import etree
url = 'http://www.boxofficecn.com/boxoffice2015'

headers = {
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "accept-encoding": "gzip, deflate",
    "accept-language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
    "cache-control": "no-cache",
    "cookie": "Hm_lvt_b6d45668276623ae0dd56fcf7dad2ead=1761810270; HMACCOUNT=E5B64E4D33FFD801; Hm_lpvt_b6d45668276623ae0dd56fcf7dad2ead=1761810339",
    "host": "www.boxofficecn.com",
    "pragma": "no-cache",
    "proxy-connection": "keep-alive",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
}

response = requests.get(url,headers=headers)

page = etree.HTML(response.text)
trs = page.xpath('//table/tbody/tr')[1:]
with open('./static/中国电影统计/movie.csv','w',encoding='utf-8')as f:
    for tr in trs:
        num = tr.xpath('./td[1]/text()')[0]
        year = tr.xpath('./td[2]/text()')[0]
        name = tr.xpath('./td[3]//a/text()')[0]
        money = tr.xpath('./td[4]//text()')[0]
        # print(num,year,name,money)
        f.write(num)
        f.write(':')
        f.write(year)
        f.write(',')
        f.write(name)
        f.write('\n')